# Replication file for "The Negotiation Calculus: Why Parties to Civil Conflict Refuse to Talk." 2015. International Studies Quarterly.
# Jeffrey M. Kaplow; University of California, San Diego; jkaplow@ucsd.edu

# ----
# The replication file includes the following data frames and lists:
#    dat_orig: data on negotiation and settlement drawn largely from the MAROB dataset
#    dat: five multiply imputed datasets created from dat_orig
#    ucdp: data on negotiation and settlement drawn from the UCDP/Prio Armed Conflict Dataset, the Non-State Actor dataset from Cunningham, Gleditsch, and Salehyan, and the Civil War Mediation dataset from DeRouen, Bercovitch, and Pospieszna.
#    bootresults: the results of two-stage cluster bootstrap models (replicated below)
#    predoutput: the output of a 3-fold out-of-sample prediction procedure (replicated below)

#    More detail about these data is available in the supplementary file and the data codebook.

load("NegotCalc_Replication.RData")

# Libraries ----
library(Zelig)    # v3.5.5
library(Amelia)   # v1.7.3
library(lmtest)   # v0.9-33
library(caret)    # v6.0-41
library(brglm)    # v0.5-9
library(WhatIf)   # v1.5-6
library(sandwich) # v2.3-2

# Functions ----

# disp_boot function to display model coefficients together with bootstrapped standard errors
disp_boot<-function(mat,df,coefcol=1,secol=2){
  require(lmtest)
  zvals<-mat[,coefcol]/mat[,secol]
  pvals<-2*pt(abs(zvals),df=df,lower.tail=FALSE)
  mat<-cbind(mat,zvals,pvals)
  colnames(mat)<-c("Estimate","Std. Error","z value","Pr(>|z|)")
  class(mat)<-"coeftest"		
  return(mat)
}

# robust function to calculate cluster robust standard errors
robust<-function(mod, clust){
  if(length(mod$y)!=length(clust)&!is.null(mod$na.action)) clust<-clust[-unique(mod$na.action)]
  if(length(mod$y)!=length(clust)&is.null(mod$na.action)) clust<-clust[as.numeric(names(mod$residuals))]
  M<-length(unique(clust))
  N<-length(clust)           
  K<-mod$rank
  dfc<-(M/(M - 1)) * ((N - 1)/(N - K))
  uj<-apply(estfun(mod), 2, function(x) tapply(x, clust, sum))
  rcv<-dfc * sandwich(mod, meat=crossprod(uj)/N)
  rse<-coeftest(mod, rcv)
  return(rse)
}

# rcsemi function to calculate cluster robust standard errors for models run with multiply imputed data; calls robust function, above
rcsemi <- function(mod, clust, impdat){
  require(sandwich)
  require(lmtest)
  require(Zelig)
  require(Amelia)
  zcalls <<- as.character(mod[[1]]$call)
  clustn <- clust
  bs.out <- NULL
  ses.out <- NULL
  rcse.output <- NA
  for(i in 1:length(mod)){
    mod.dat<<-as.data.frame(mod[[i]]$data)
    mod.out<-zelig(formula=zcalls[2],model=zcalls[3],data=mod.dat)
    clustcl<-which(names(impdat[[i]])==clustn)
    clust<-impdat[[i]][[clustcl]]
    rcse.output<-robust(mod=mod.out,clust=clust)
    if(is.null(bs.out)) bs.out<-rcse.output[,1] else bs.out<-rbind(bs.out,rcse.output[,1])
    if(is.null(ses.out)) ses.out<-rcse.output[,2] else ses.out<-rbind(ses.out,rcse.output[,2])
  }
  rcse.done<-mi.meld(q=bs.out,se=ses.out)
  rcse.done<-cbind(rcse.done$q.mi[1,],rcse.done$se.mi[1,])
  rcse.done<-cbind(rcse.done,(rcse.done[,1]/rcse.done[,2]),(2*pt(abs(rcse.done[,1]/rcse.done[,2]), df=df.residual(mod[[1]]),lower.tail=FALSE)))
  colnames(rcse.done)<-c("Estimate","Std. Error","z value","Pr(>|z|)")
  class(rcse.done)<-"coeftest"
  rm(list=c("zcalls","mod.dat"),envir=.GlobalEnv)
  return(rcse.done)
}

# Estimate Pr(Settlement | Negotiation) ----

# Limit data to negotiation observations prior to a peace agreement
dat_negot<-dat
for(i in 1:dat_negot$m){
  negot_obs<-which(dat_negot[[1]][[i]]$negot==1&dat_negot[[1]][[i]]$postpa==0)
  dat_negot[[1]][[i]]<-dat_negot[[1]][[i]][negot_obs,]
}

# Build first-stage model
mod_settlement<-zelig(pa_year ~ terrgoal + numopps + outsup + milwing + ORGREB + guer + legal + polity + politysquared + vioyrs + peaceyrs + peaceyrs2 + peaceyrs3, data=dat_negot$imputations,model="probit")

# Calculate pr(settlement | negotiation)
for(i in 1:dat$m){
  dat[[1]][[i]]$negset.predmi<-NA
  pred_settlement<-predict(mod_settlement[i],newdata=dat[[1]][[i]],type="response",se.fit=TRUE)
  dat[[1]][[i]]$negset.predmi<-pred_settlement[[1]]$fit
}

# Bootstrapped standard errors for main results ----
# NOTE: Computing bootstrapped standard errors for the models used in the article takes a significant amount of time to complete. The output from this process is included in the enclosed replication files. To substitute the included bootstrapped standard errors instead of computing them locally, skip to the next section.

# clustresamp -- resample clusters in a dataset with replacement
clustresamp<-function(dat,clustcol,retdat=TRUE,retind=FALSE){
  uclust<-unique(dat[,clustcol])
  clustsamp<-uclust[sample(1:length(uclust),length(uclust),replace=TRUE)]
  clustfreqs<-table(clustsamp)
  clustdat<-NULL
  samp<-NULL
  sampind<-NULL
  for(a in 1:length(clustfreqs)){
    samp<-which(as.character(dat[,clustcol])==names(clustfreqs)[a])
    cdat<-dat[samp,]
    for(b in 1:clustfreqs[a]){
      sampind<-c(sampind,samp)
      clustdat<-rbind(clustdat,cdat)
    }
  }
  if(retdat==TRUE&retind==FALSE) return(clustdat)
  if(retdat==FALSE&retind==TRUE) return(sampind)
  if(retdat==TRUE&retind==TRUE) return(list("dat"=clustdat,"ind"=sampind))
}

# clustbootmi -- two-stage bootstrap for multiply imputed data
clustbootmi<-function(dat,clustcol,runs=2,robust=FALSE){
  bootmi.out<-as.data.frame(matrix(NA,nrow=runs,ncol=13))
  bootmi.out.nopred<-as.data.frame(matrix(NA,nrow=runs,ncol=12))
  bootmi.out.h1<-as.data.frame(matrix(NA,nrow=runs,ncol=7))
  bootmi.out.h2<-as.data.frame(matrix(NA,nrow=runs,ncol=7))
  bootmi.out.h3<-as.data.frame(matrix(NA,nrow=runs,ncol=7))
  bootmi.out.h4<-as.data.frame(matrix(NA,nrow=runs,ncol=8))
  bootmi.out.h5<-as.data.frame(matrix(NA,nrow=runs,ncol=7))
  
  se.out<-as.data.frame(matrix(NA,nrow=runs,ncol=13))
  se.out.nopred<-as.data.frame(matrix(NA,nrow=runs,ncol=12))
  se.out.h1<-as.data.frame(matrix(NA,nrow=runs,ncol=7))
  se.out.h2<-as.data.frame(matrix(NA,nrow=runs,ncol=7))
  se.out.h3<-as.data.frame(matrix(NA,nrow=runs,ncol=7))
  se.out.h4<-as.data.frame(matrix(NA,nrow=runs,ncol=8))
  se.out.h5<-as.data.frame(matrix(NA,nrow=runs,ncol=7))
  
  n<-rep(NA,runs)
  for(i in 1:runs){
    cat(i, "of",runs,"\r") 
    flush.console()
    bdat <- dat
    bdatneg <- bdat
    bind <- clustresamp(dat[[1]][[1]],clustcol,retdat=FALSE,retind=TRUE)
    for(j in 1:dat$m){
      bdat[[1]][[j]]<-bdat[[1]][[j]][bind,]
      bdatneg[[1]][[j]]<-bdatneg[[1]][[j]][bind,]
      bdatneg[[1]][[j]]<-bdatneg[[1]][[j]][which(bdatneg[[1]][[j]]$negot==1),]
    }
    
    # first stage
    sup<-capture.output( b.mod.negsetmi<-zelig(pa_year ~ terrgoal + numopps + outsup + milwing + ORGREB + guer + legal + polity + politysquared + vioyrs + peaceyrs + peaceyrs2 + peaceyrs3, data=bdatneg$imputations,model="probit") )
    for(j in 1:bdatneg$m){
      b.negset.predmi<-predict(b.mod.negsetmi[[j]],newdata=bdat[[1]][[j]],type="response",se.fit=TRUE)
      bdat[[1]][[j]]$b.negset.predmi<-b.negset.predmi$fit
    }

    # second stage
    sup<-capture.output( b.modmi<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + b.negset.predmi + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=bdat$imputations) )
    if(robust==TRUE) sup<-capture.output( b.modmi.rcse<-rcsemi(b.modmi,clust="orgId",impdat=bdat$imputations) )
    
    sup<-capture.output( b.modmi.nopred<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=bdat$imputations) )
    if(robust==TRUE) sup<-capture.output( b.modmi.nopred.rcse<-rcsemi(b.modmi.nopred,clust="orgId",impdat=bdat$imputations) )
    
    sup<-capture.output( b.modmi.h1<-zelig(negot ~ maropps + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=bdat$imputations) )
    if(robust==TRUE) sup<-capture.output( b.modmi.h1.rcse<-rcsemi(b.modmi.h1,clust="orgId",impdat=bdat$imputations) )
    
    sup<-capture.output( b.modmi.h2<-zelig(negot ~ outsup2 + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=bdat$imputations) )
    if(robust==TRUE) sup<-capture.output( b.modmi.h2.rcse<-rcsemi(b.modmi.h2,clust="orgId",impdat=bdat$imputations) )
    
    sup<-capture.output( b.modmi.h3<-zelig(negot ~ RELORG + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=bdat$imputations) )
    if(robust==TRUE) sup<-capture.output( b.modmi.h3.rcse<-rcsemi(b.modmi.h3,clust="orgId",impdat=bdat$imputations) )
    
    sup<-capture.output( b.modmi.h4<-zelig(negot ~ yearsest + legit2 + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=bdat$imputations) )
    if(robust==TRUE) sup<-capture.output( b.modmi.h4.rcse<-rcsemi(b.modmi.h4,clust="orgId",impdat=bdat$imputations) )
    
    sup<-capture.output( b.modmi.h5<-zelig(negot ~ fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=bdat$imputations) )
    if(robust==TRUE) sup<-capture.output( b.modmi.h5.rcse<-rcsemi(b.modmi.h5,clust="orgId",impdat=bdat$imputations) )
    
    # capture output
    bootmi.out[i,]<-summary(b.modmi)$coefficients[,1]
    bootmi.out.nopred[i,]<-summary(b.modmi.nopred)$coefficients[,1]
    bootmi.out.h1[i,]<-summary(b.modmi.h1)$coefficients[,1]
    bootmi.out.h2[i,]<-summary(b.modmi.h2)$coefficients[,1]
    bootmi.out.h3[i,]<-summary(b.modmi.h3)$coefficients[,1]
    bootmi.out.h4[i,]<-summary(b.modmi.h4)$coefficients[,1]
    bootmi.out.h5[i,]<-summary(b.modmi.h5)$coefficients[,1]
    
    if(robust==FALSE) se.out[i,]<-summary(b.modmi)$coefficients[,2] else se.out[i,]<-b.modmi.rcse[,2]
    if(robust==FALSE) se.out.nopred[i,]<-summary(b.modmi.nopred)$coefficients[,2] else se.out.nopred[i,]<-b.modmi.nopred.rcse[,2]
    if(robust==FALSE) se.out.h1[i,]<-summary(b.modmi.h1)$coefficients[,2] else se.out.h1[i,]<-b.modmi.h1.rcse[,2]
    if(robust==FALSE) se.out.h2[i,]<-summary(b.modmi.h2)$coefficients[,2] else se.out.h2[i,]<-b.modmi.h2.rcse[,2]
    if(robust==FALSE) se.out.h3[i,]<-summary(b.modmi.h3)$coefficients[,2] else se.out.h3[i,]<-b.modmi.h3.rcse[,2]
    if(robust==FALSE) se.out.h4[i,]<-summary(b.modmi.h4)$coefficients[,2] else se.out.h4[i,]<-b.modmi.h4.rcse[,2]
    if(robust==FALSE) se.out.h5[i,]<-summary(b.modmi.h5)$coefficients[,2] else se.out.h5[i,]<-b.modmi.h5.rcse[,2]
    
    n[i]<-b.modmi[[1]]$df.null+1
  }
  names(bootmi.out)<-names(summary(b.modmi)$coefficients[,1])
  names(bootmi.out.nopred)<-names(summary(b.modmi.nopred)$coefficients[,1])
  names(bootmi.out.h1)<-names(summary(b.modmi.h1)$coefficients[,1])
  names(bootmi.out.h2)<-names(summary(b.modmi.h2)$coefficients[,1])
  names(bootmi.out.h3)<-names(summary(b.modmi.h3)$coefficients[,1])	
  names(bootmi.out.h4)<-names(summary(b.modmi.h4)$coefficients[,1])
  names(bootmi.out.h5)<-names(summary(b.modmi.h5)$coefficients[,1])
  
  ret.out<-list("coefficients"=colMeans(bootmi.out),"coefficients.nopred"=colMeans(bootmi.out.nopred),"coefficients.h1"=colMeans(bootmi.out.h1),"coefficients.h2"=colMeans(bootmi.out.h2),"coefficients.h3"=colMeans(bootmi.out.h3),"coefficients.h4"=colMeans(bootmi.out.h4),"coefficients.h5"=colMeans(bootmi.out.h5),"se"=apply(bootmi.out,2,sd),"se.nopred"=apply(bootmi.out.nopred,2,sd),"se.h1"=apply(bootmi.out.h1,2,sd),"se.h2"=apply(bootmi.out.h2,2,sd),"se.h3"=apply(bootmi.out.h3,2,sd),"se.h4"=apply(bootmi.out.h4,2,sd),"se.h5"=apply(bootmi.out.h5,2,sd),"se.orig"=se.out,"se.orig.nopred"=se.out.nopred,"se.orig.h1"=se.out.h1,"se.orig.h2"=se.out.h2,"se.orig.h3"=se.out.h3,"se.orig.h4"=se.out.h4,"se.orig.h5"=se.out.h5,"n"=n,"t"=bootmi.out,"t.nopred"=bootmi.out.nopred,"t.h1"=bootmi.out.h1,"t.h2"=bootmi.out.h2,"t.h3"=bootmi.out.h3,"t.h4"=bootmi.out.h4,"t.h5"=bootmi.out.h5)
  a<-cbind(ret.out$coefficients,ret.out$se)
  colnames(a)<-c("Coef","SE")
  print(a)
  return(ret.out)
}

set.seed(1)
bootresults<-clustbootmi(dat,which(names(dat[[1]][[1]])=="orgId"),runs=10000,robust=TRUE)

## Skip to this point if using the bootstrapped standard errors included in the replication file.

# Table 1 ----
m1mod<-zelig(negot ~ maropps + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=dat$imputations)
m1_coef<-summary(m1mod)$coefficients[,1]
m2mod<-zelig(negot ~ outsup2 + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=dat$imputations)
m2_coef<-summary(m2mod)$coefficients[,1]
m3mod<-zelig(negot ~ RELORG + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=dat$imputations)
m3_coef<-summary(m3mod)$coefficients[,1]
m4mod<-zelig(negot ~ yearsest + legit2 + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=dat$imputations)
m4_coef<-summary(m4mod)$coefficients[,1]
m5mod<-zelig(negot ~ fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=dat$imputations)
m5_coef<-summary(m5mod)$coefficients[,1]

disp_boot(cbind(m1_coef,bootresults$se.h1),m1mod[[1]]$df.residual)
disp_boot(cbind(m2_coef,bootresults$se.h2),m2mod[[1]]$df.residual)
disp_boot(cbind(m3_coef,bootresults$se.h3),m3mod[[1]]$df.residual)
disp_boot(cbind(m4_coef,bootresults$se.h4),m4mod[[1]]$df.residual)
disp_boot(cbind(m5_coef,bootresults$se.h5),m5mod[[1]]$df.residual)

# Table 2 ----

m6mod<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=dat$imputations)
m6_coef<-summary(m6mod)$coefficients[,1]
m7mod<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + negset.predmi + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=dat$imputations)
m7_coef<-summary(m7mod)$coefficients[,1]

disp_boot(cbind(m6_coef,bootresults$se.nopred),m6mod[[1]]$df.residual)
disp_boot(cbind(m7_coef,bootresults$se),m7mod[[1]]$df.residual)

# Convex hull analysis ----

pct_inhull<-rep(NA,dat$m)
for(i in 1:dat$m){
  dat_fact<-dat[[1]][[i]][which(dat[[1]][[i]]$negot==1),]
  dat_counter<-dat[[1]][[i]][which(dat[[1]][[i]]$negot==0),]
  varlist<-c("terrgoal","numopps","outsup","milwing","ORGREB","guer","legal","polity","vioyrs")
  varnums<-match(varlist,names(dat[[1]][[1]]))
  dat_fact<-dat_fact[,varnums]
  dat_counter<-dat_counter[,varnums]
  cf_out<-whatif(data=dat_fact,cfact=dat_counter)
  pct_inhull[i]<-length(which(cf_out$in.hull==TRUE))/length(cf_out$in.hull)
}
mean(pct_inhull)

# Figure 1 (Substantive effects) ----

# Function to calculate substantive effects for multiple variables
calcsubeffects<-function(subvals,zmod,boot_t=NULL,dat=NULL) {
  require(Zelig)
  if(class(zmod)[1]=="MI") mod<-zmod[[1]] else mod<-zmod
  vars<-names(mod$coefficients)
  for(i in 2:length(vars)){
    eval(parse(text=paste("xlow.out<-setx(zmod,",vars[i],"=min(mod$data$",vars[i],"), data=dat)",sep="")))
    eval(parse(text=paste("xhigh.out<-setx(zmod,",vars[i],"=max(mod$data$",vars[i],"), data=dat)",sep="")))
    exp_low<-matrix(mod$family$linkinv(as.matrix(boot_t) %*% t(as.matrix(xlow.out))), nrow = nrow(as.matrix(boot_t)))
    exp_high<-matrix(mod$family$linkinv(as.matrix(boot_t) %*% t(as.matrix(xhigh.out))), nrow = nrow(as.matrix(boot_t)))
    fd<-exp_high-exp_low
    subvals[i,1]<-mean(fd)
    subvals[i,2]<-sd(fd)
    subvals[i,3]<-sort(fd)[(.05*length(fd))/2]
    subvals[i,4]<-sort(fd)[length(fd)-(.05*length(fd))/2]
    }
  rownames(subvals)<-vars
  names(subvals)<-c("fd","sd","ci-low","ci-high")
  return(subvals)
}

# Substantive effects with all other variables held at mean
subeffects<-as.data.frame(matrix(NA,nrow=1,ncol=4))
subeffects<-calcsubeffects(subeffects,m7mod,boot_t=bootresults$t,dat=NULL)
print(subeffects)

# Substantive effects with all other variables held at mean for cases at high risk for negotiation (> 10% predicted probability of negotiation)
subeffects_high<-as.data.frame(matrix(NA,nrow=1,ncol=4))
highobs<-which(m7mod[[1]]$fitted.values>.1)
subeffects_high<-calcsubeffects(subeffects_high,m7mod[[1]],boot_t=bootresults$t,dat=dat[[1]][[1]][highobs,])
print(subeffects_high)

# Out-of-sample prediction, Figure A2 ----

# NOTE: This out-of-sample predictive analysis takes a significant amount of time to complete. The output from this process is included in the enclosed replication files. To substitute the included predictive results instead of computing them locally, skip to the next section.

# Function to calculate out-of-sample prediction results for a given set of training and testing data 
calcpred<-function(trndat,tstdat,trainobs,testobs){
  predresults_per<-as.data.frame(matrix(data=NA,nrow=trndat$m,ncol=13))
  names(predresults_per)<-c("TotalAUC","BaselineAUC","lagdv","allcost","maropps","outsup2","RELORG","yearsest","legit2","fract","ORGREB","terrcont","negset.predmi")
  
  for(i in 1:trndat$m){
    train.dat<-trndat[[1]][[i]][trainobs,]
    test.dat<-tstdat[[1]][[i]][testobs,]
    
    trainmod<-brglm(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + negset.predmi + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3,family=binomial(link="probit"),data=train.dat)
    modpred<-predict(trainmod,newdata=test.dat,type="response",se.fit=TRUE)
    predresults_per[i,1]<-rocplot(test.dat$negot,test.dat$negot,modpred$fit,modpred$fit,plot=FALSE)$area1
    trainmod<-brglm(negot ~ nonnegotyrs + nonnegotyrs2 + nonnegotyrs3,family=binomial(link="probit"),data=train.dat)
    modpred<-predict(trainmod,newdata=test.dat,type="response",se.fit=TRUE)
    predresults_per[i,2]<-rocplot(test.dat$negot,test.dat$negot,modpred$fit,modpred$fit,plot=FALSE)$area1
    trainmod<-brglm(negot ~ negot_lag,family=binomial(link="probit"),data=train.dat)
    modpred<-predict(trainmod,newdata=test.dat,type="response",se.fit=TRUE)
    badobs<-which(is.na(modpred$fit))
    predresults_per[i,3]<-rocplot(test.dat$negot[-badobs],test.dat$negot[-badobs],modpred$fit[-badobs],modpred$fit[-badobs],plot=FALSE)$area1
    trainmod<-brglm(negot ~ ORGREB + terrcont + negset.predmi + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3,family=binomial(link="probit"),data=train.dat)
    modpred<-predict(trainmod,newdata=test.dat,type="response",se.fit=TRUE)
    predresults_per[i,4]<-rocplot(test.dat$negot,test.dat$negot,modpred$fit,modpred$fit,plot=FALSE)$area1
    varlist<-names(predresults_per)[5:ncol(predresults_per)]
    for(j in 1:length(varlist)){
      trainform<-paste("negot ~ ",paste(varlist[-j],collapse=" + "))
      trainmod<-brglm(trainform,family=binomial(link="probit"),data=train.dat)
      modpred<-predict(trainmod,newdata=test.dat,type="response",se.fit=TRUE)
      predresults_per[i,j+4]<-rocplot(test.dat$negot,test.dat$negot,modpred$fit,modpred$fit,plot=FALSE)$area1
    }
  }
  predresults_per_se<-predresults_per
  predresults_per_se[1:row(predresults_per),]<-0.5 
  return(as.vector(mi.meld(predresults_per,predresults_per_se)$q.mi))
}

# Function to calculate prediction results using k-fold cross-validation and bootstrapped confidence intervals; calls calcpred, above
bootpredmi<-function(dat,k=3,runs=10,bootn=100){
  kfolds<-createMultiFolds(dat[[1]][[1]]$negot,k=k,times=runs)
  totruns<-runs*k
  
  predresults_total<-as.data.frame(matrix(data=NA,nrow=totruns*bootn,ncol=13))
  names(predresults_total)<-c("TotalAUC","BaselineAUC","lagdv","allcost","maropps","outsup2","RELORG","yearsest","legit2","fract","ORGREB","terrcont","negset.predmi")
  
  for(a in 1:totruns){
    print(paste(a, " of ", totruns, " runs",sep=""))
    traindat<-dat
    testdat<-dat
    
    for(z in 1:dat$m){
      traindat[[1]][[z]]<-traindat[[1]][[z]][kfolds[[a]],]
      testdat[[1]][[z]]<-testdat[[1]][[z]][-kfolds[[a]],]
    }
    if(bootn>1) trainsamples<-createResample(traindat[[1]][[1]]$negot,bootn) else trainsamples<-c(1:nrow(traindat[[1]][[1]]))
    if(bootn>1) testsamples<-createResample(testdat[[1]][[1]]$negot,bootn) else testsamples<-c(1:nrow(testdat[[1]][[1]]))
    
    for(b in 1:bootn){
      print(paste(b, " of ", bootn, " bootstraps",sep=""))
      if(bootn>1) predresults_total[(((bootn)*(a-1))+b),]<-calcpred(traindat,testdat,trainsamples[[b]],testsamples[[b]])	 else predresults_total[(((bootn)*(a-1))+b),]<-calcpred(traindat,testdat,trainsamples,testsamples)	 
    }
  }
  return(predresults_total)
}

set.seed(1)
predoutput<-bootpredmi(dat,k=3,runs=10,bootn=100)

## Skip to this point if using the out-of-sample prediction results included in the replication file.

# AUC for individual models
colMeans(predoutput) 

# Function to view prediction results output in terms of the difference in predictive success compared to some baseline model
predformat_diff<-function(pout,k=3,runs=10,bootn=100,alpha=.05,basecol=2){
  statout<-rep(NA,ncol(pout)*3)
  statnames<-rep(NA,length(statout))
  for(i in 1:ncol(pout)){
    pt<-rep(NA,runs*k)
    cih<-rep(NA,runs*k)
    cil<-rep(NA,runs*k)
    
    for(j in 1:(runs*k)){
      stats<-pout[(((j-1)*bootn)+1):((((j-1)*bootn)+bootn)),i]-pout[(((j-1)*bootn)+1):((((j-1)*bootn)+bootn)),basecol]
      pt[j]<-mean(stats)
      cih[j]<-sort(stats)[round((1-(alpha/2))*length(stats))]
      cil[j]<-sort(stats)[ifelse(round((alpha/2)*length(stats))==0,1,round((alpha/2)*length(stats)))]
    }
    
    statout[(((i-1)*3)+1):(((i-1)*3)+3)]<-c(mean(pt),mean(cih),mean(cil))
    statnames[(((i-1)*3)+1):(((i-1)*3)+3)]<-c(names(pout)[i],paste(names(pout)[i],"_cih",sep=""),paste(names(pout)[i],"_cil",sep=""))
    
  }
  statout<-as.data.frame(matrix(statout,nrow=1,ncol=length(statout)))
  names(statout)<-statnames
  return(statout)
}

# Change in AUC from adding particular variables to an otherwise fully specified model
preddiffs_form<-predformat_diff(predoutput,k=3,runs=10,bootn=100,alpha=.05,basecol=1)
preddiffs_form<-preddiffs_form*-1
print(preddiffs_form) 

# External validity and mediation, Table A1 ----

mod_a1<-brglm(Negotiations..dyad. ~ maropps + anyrebmilsup + anypolsup + yearsest + highterrcont + modrebcap + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3,family=binomial(link="probit"),data=ucdp)
summary(mod_a1)

mod_a2<-brglm(Negotiations..dyad. ~ med_all + maropps + anyrebmilsup + anypolsup + yearsest + highterrcont + modrebcap + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3,family=binomial(link="probit"),data=ucdp)
summary(mod_a2)

ucdp_nomed<-ucdp[which(ucdp$nomed==1),]
ucdp_nomed$med_offer[which(is.na(ucdp_nomed$med_offer))]<-0
mod_a3<-brglm(Negotiations..dyad. ~ med_offer + maropps + anyrebmilsup + anypolsup + yearsest + highterrcont + modrebcap + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3,family=binomial(link="probit"),data=ucdp_nomed)
summary(mod_a3)

# A bivariate probit model that treats mediation as an endogenous regressor also was estimated in Stata, using the following statement:
# biprobit (Negotiations__dyad_=med_all maropps anyrebmilsup anypolsup yearsest highterrcont modrebcap nonnegotyrs nonnegotyrs2 nonnegotyrs3) (med_all=maropps anyrebmilsup anypolsup yearsest highterrcont modrebcap cinc realgdp prevactive prevnegot extraterritorial)

# Additional robustness checks referenced in article and appendix ----

# Cluster-robust standard errors without bootstrap
m6mod<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=dat$imputations)
rcsemi(mod = m6mod,clust = "orgId",impdat = dat$imputations)

# Listwise-deletion instead of multiple imputation
mod_listwise<-glm(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, family=binomial(link="probit"), data=dat_orig)
robust(mod_listwise,dat_orig$orgId)

# Rare-events logit
mod_relogit<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="relogit", data=dat$imputations)
bs.out<-NULL
ses.out<-NULL
rcseout<-NA
for(i in 1:length(mod_relogit)){
  md<-as.data.frame(mod_relogit[[i]]$data)
  modout<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="relogit",data=md)
  clust<-md$orgId
  rcseout<-robust(modout,clust)
  if(is.null(bs.out)) bs.out<-rcseout[,1] else bs.out<-rbind(bs.out,rcseout[,1])
  if(is.null(ses.out)) ses.out<-rcseout[,2] else ses.out<-rbind(ses.out,rcseout[,2])
}
rcseout<-mi.meld(q=bs.out,se=ses.out)
rcseout<-cbind(rcseout$q.mi[1,],rcseout$se.mi[1,])
rcseout<-cbind(rcseout,(rcseout[,1]/rcseout[,2]),(2*pt(abs(rcseout[,1]/rcseout[,2]), df=df.residual(mod_relogit[[1]]),lower.tail=FALSE)))
colnames(rcseout)<-c("Estimate","Std. Error","z value","Pr(>|z|)")
class(rcseout)<-"coeftest"
rcseout

# Only non-violent outside military support to rebels
mod_nonviolent<-zelig(negot ~ maropps + outsup2_nv + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3, model="probit", data=dat$imputations)
rcsemi(mod = mod_nonviolent,clust = "orgId",impdat = dat$imputations)

# Alternative treatments of negotiation variable
# Coding only ORGSUCCESS = 1
for(i in 1:dat$m){ 
  dat[[1]][[i]]$negot_narrow[which(dat[[1]][[i]]$ORGSUCCESS<=0|dat[[1]][[i]]$ORGSUCCESS>1)]<-0
  dat[[1]][[i]]$negot_narrow[which(dat[[1]][[i]]$ORGSUCCESS==1)]<-1
}
mod_negot_narrow<-zelig(negot_narrow ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3,model="probit",data=dat$imputations)
rcsemi(mod_negot_narrow,clust="orgId",impdat=dat$imputations)

# Dropping ORGSUCCESS > 1 from data
dat_negot_narrow<-dat
for(i in 1:dat_negot_narrow$m){ 
  dat_negot_narrow[[1]][[i]]<-dat_negot_narrow[[1]][[i]][which(dat_negot_narrow[[1]][[i]]$ORGSUCCESS<2),]
}
mod_negot_narrow2<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3,model="probit",data=dat_negot_narrow$imputations)
rcsemi(mod_negot_narrow2,clust="orgId",impdat=dat_negot_narrow$imputations)

# Negotiation history
mod_negot_hist<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + negset.predmi + cumnegot + cumnegot:maropps + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3,model="probit",data=dat$imputations)
rcsemi(mod_negot_hist,clust="orgId",impdat=dat$imputations)
# Calculate min-max second difference
x.low.low<-setx(mod_negot_hist,maropps=0,cumnegot=0)
x.low.high<-setx(mod_negot_hist,maropps=0,cumnegot=45)
x.high.low<-setx(mod_negot_hist,maropps=8,cumnegot=0)
x.high.high<-setx(mod_negot_hist,maropps=8,cumnegot=45)
s1.out<-sim(mod_negot_hist,x=x.low.low,x1=x.low.high)
s2.out<-sim(mod_negot_hist,x=x.high.low,x1=x.high.high)
ddiff<-s2.out$qi$fd-s1.out$qi$fd
mean(ddiff)
quantile(ddiff,1-((1-(90/100))/2))
quantile(ddiff,((1-(90/100))/2))

# Democracy
# Interaction between polity variable and reputation
mod_dem_rep<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + maropps:polity + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3 + polity,model="probit",data=dat$imputations)
rcsemi(mod_dem_rep,clust="orgId",impdat=dat$imputations)
# Polity variable
mod_dem<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3 + polity,model="probit",data=dat$imputations)
rcsemi(mod_dem,clust="orgId",impdat=dat$imputations)

# Longevity of opposition groups
# Dropping long-standing opposition groups from the data
dat_neworgs<-dat
for(i in 1:dat_neworgs$m){ dat_neworgs[[1]][[i]]<-dat_neworgs[[1]][[i]][which(dat_neworgs[[1]][[i]]$yearsest<=10),] }
mod_neworgs<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3,model="probit",data=dat_neworgs$imputations)
rcsemi(mod_neworgs,clust="orgId",impdat=dat_neworgs$imputations)
# Including only long-standing opposition groups
dat_oldorgs<-dat
for(i in 1:dat_oldorgs$m){ dat_oldorgs[[1]][[i]]<-dat_oldorgs[[1]][[i]][which(dat_oldorgs[[1]][[i]]$yearsest>10),] }
mod_oldorgs<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3,model="probit",data=dat_oldorgs$imputations)
rcsemi(mod_oldorgs,clust="orgId",impdat=dat_oldorgs$imputations)

# Spatial diffusion
# Number of negotiations outside the state in question
mod_othnegot<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3 + othnegot,model="probit",data=dat$imputations)
rcsemi(mod_othnegot,clust="orgId",impdat=dat$imputations)
# Number of negotiations outside the state in question, lagged one year
mod_othnegot_lag<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3 + othnegot_lag,model="probit",data=dat$imputations)
rcsemi(mod_othnegot_lag,clust="orgId",impdat=dat$imputations)
# Number of negotiations by opposition group outside the state in question 
mod_orgnegot<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3 + orgnegot,model="probit",data=dat$imputations)
rcsemi(mod_orgnegot,clust="orgId",impdat=dat$imputations)
# Number of negotiations by opposition group outside the state in question, lagged one year
mod_orgnegot_lag<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3 + orgnegot_lag,model="probit",data=dat$imputations)
rcsemi(mod_orgnegot_lag,clust="orgId",impdat=dat$imputations)
# Number of negotiations conducted by the government outside the dyad in question
mod_statenegot<-zelig(negot ~ maropps + outsup2 + RELORG + yearsest + legit2 + fract + ORGREB + terrcont + nonnegotyrs + nonnegotyrs2 + nonnegotyrs3 + statenegot,model="probit",data=dat$imputations)
rcsemi(mod_statenegot,clust="orgId",impdat=dat$imputations)


